********************************************************************************
**	PURPOSE: Survey question variable generation & macro creation in wide data set format by surveyid
**							
**	INPUTS: Index_Wide.dta
**	
**	OUTPUTS: Wide with survey.dta
**				
**	NOTES: Originally named 03_Survey.do in archived files
**
** 	CREATED/MODIFIED BY: Nora Gregory, Kayla Wilding, Leah Kim, Hasan Ahamed
**
**	DATE CREATED: 01/04/17
**
**	DATE LAST EDITED: 02/28/2023
********************************************************************************
*0. Open dataset
*1. Create sample flags
*2. Create survey index variables
*3. Create survey indices
	clear all
/*******************************************************************************
0. Open data set to add in survey variables
********************************************************************************/
	use "$adta/Index_Wide", clear

/*******************************************************************************
1. Clean up/Create sample flags
********************************************************************************/
*A. Drop
*B. Flag groups


**Drop the 41 people that do not match to the admin data
		assert _N == 2310
	drop if mi(depbal1)	
		assert _N == 2269


* Create flags for the three groups, treatment (n = 789), control (n = 742), and uninterested(n = 738)
	gen flag_cblgroup = (enc == 1)  if !mi(enc)  
		qui count if flag_cblgroup == 1
		assert r(N) == 789
	gen flag_extragroup = (enc == 0) if !mi(enc) 
		qui count if flag_extragroup == 1
		assert r(N) == 742
	gen flag_unintgroup = (mi(enc))
		qui count if flag_unintgroup == 1
		assert r(N) == 738
	gen treatunint = (flag_cblgroup == 1 | flag_unintgroup == 1)
	gen extraunint = (flag_extragroup == 1 | flag_unintgroup == 1)

**Create flags for the main samples used in regressions
	*This flag indicates the sample that was randomized (treatment/cbl group and control/extra step group)
	gen flag_randomized = !mi(enc) 		
		qui count if flag_randomized == 1
		assert r(N) == 1531				   			
	*This flag indicates the sample that was randomized, but restricted to only those scored at baseline- want to restrict the sample to only that are scored at baseline when we use the outcome variable of ficoscore 
	gen flag_randomized_scored_base = (flag_randomized == 1 & !mi(ficoscore081)) 			
		qui count if flag_randomized_scored_base == 1
		assert r(N) == 1238
	*This flag indicates the entire study sample (everybody matched to the admin data; cbl/treatment group, control/extra step group, uninterested group. This variable is needed for the regressions to run as I always use `restriction' == 1 where restriction is the sample flag for that variable 
	gen flag_matched = 1
		qui count if flag_matched == 1
		assert r(N) == 2269		
	*This flag indicates those of the entire study sample that have a ficoscore08 at baseline.		
	gen flag_matched_scored_base = (flag_matched == 1 & !mi(ficoscore081))
	
	gen ne_ln = (opentradeB1 == 0 & openinstallB1 == 0) if (!mi(opentradeB1) | !mi(openinstallB1))  
	
/*******************************************************************************
2. Create Survey Indexes Variables
********************************************************************************/

** Reorder Likert questions from Q18, so that higher value = worse/less desirable behavior/attitude, i.e. Higher Fin Stress = Higher value
	foreach var in $revvars { 
		revrs `var'
		replace rev`var' = . if `var' > 5 
		numlabel rev`var', add
	}

	numlabel likert , add 
	
** Construct credit knowledge index from Question 20: score excluding parts b & c 
	gen creditc = (intcredit == 1) if !mi(intcredit)
	gen healthc = (inthealth == 0) if !mi(inthealth)
	gen agec = (intage == 0) if !mi(intage)
	gen borrowc = (intborrow == 1) if !mi(intborrow)
	gen repayc = (intrepay == 1) if !mi(intrepay)

** Construct lack of liquidity index: 1 = if <$60 in saving at SLCCU at baseline, 1 if <med income, 1 if denied credit Q11)
	* Create savings dummies for baseline savings at slccu 
	*savingsbal_me is admin (SLCCU) data
	gen savslccu60less = (savingsbal_me1 < 60) if !mi(savingsbal_me1)
	gen savslccu60more = (savslccu60less == 0) if !mi(savslccu60less)
		
	* Question 7 - Income: binary for above/below median
	*income is a survey data
	qui sum income, de
	local med `r(p50)'
	gen incblmed = (income < `med') if !mi(income)

	* Question 11 - Have you had difficulty getting approved for loans in the past three years?	
	*apploan is a survey var
	gen appdifficult = apploan 
	replace appdifficult = 0 if apploan == 2

** Create a flag for less than or equal to 25 years old at baseline
	gen yg25 = (age <= 25) if !mi(age)
	label var yg25 "Younger than or equal to 25 years old"
	
	gen college = (school == 4 | school == 5) if school != .
		label var college "College educated"
	gen svymiss = (adults == .)
		label var svymiss "Missing Baseline Survey"
	gen inclt30 = (income == 1 | income == 2 | income == 3)
		label var inclt30 "HH income less than 30k"
	gen incgt30 = (inclt30 == 0) if !mi(inclt30)
		label var incgt30 "HH income greater than 30k"
		
	gen fs081_400 = (ficoscore081 <= 499 & ficoscore081 >= 400)
		label var fs081_400 "1 = Fico Score in the 400s at baseline"
	gen fs081_500 = (ficoscore081 <= 599 & ficoscore081 >= 500)
		label var fs081_500 "1 = Fico Score in the 500s at baseline"
	gen fs081_600 = (ficoscore081 <= 699 & ficoscore081 >= 600)
		label var fs081_600 "1 = Fico Score in the 600s at baseline"
	gen fs081_700 = (ficoscore081 <= 799 & ficoscore081 >= 700)
		label var fs081_700 "1 = Fico Score in the 700s at baseline"
	gen fs081_800 = (ficoscore081 <= 899 & ficoscore081 >= 800)
		label var fs081_800 "1 = Fico Score in the 800s at baseline"
		
	ihstrans openinstalltr1 inquiry121 openrevolvtr1 usecreditoptrst1

/*******************************************************************************
3. Create Survey Indexes 
********************************************************************************/
 * Create standardized indexes using z-score

***This program uses the mean and sd of the control group to standardize a var or an index "x".
program define stan 
	args x 
		
	qui sum `x' if flag_extragroup == 1
	local mean `r(mean)'
	local sd `r(sd)'
	gen z_`x' = (`x' - `mean')/`sd'
end

	*Loop through all indexes that use only baseline/constant variables
	local indices $insecurity $selfcont $risk $credstatus $credknow $liquid $credac $credac_rev
	foreach var in `:list uniq indices' {
		stan `var'
	}

	foreach index in insecurity selfcont risk credstatus credknow credstatusknow liquid credac credac2 credac_rev {
		foreach var in $`index' { 
			local z_`index' `z_`index'' z_`var'
		}
		egen `index'_i = rowmean(`z_`index'')
		stan `index'_i
	}

*Split the installment credit access index at baseline into teriles to create
* heterogenous groups 
	xtile credac3ile = z_credac_i if flag_randomized == 1, n(3)
															
	gen htca = (credac3ile==3) if !mi(credac3ile)
	gen ltca = (credac3ile==1) if !mi(credac3ile)
	gen mtca = (credac3ile==2) if !mi(credac3ile)
	
*Split the new version of installment credit access index at baseline into terciles	
	xtile credac3ile2 = z_credac2_i if flag_randomized == 1, n(3)
															
	gen htca2 = (credac3ile2==3) if !mi(credac3ile2)
	gen ltca2 = (credac3ile2==1) if !mi(credac3ile2)
	gen mtca2 = (credac3ile2==2) if !mi(credac3ile2)

	
/*******************************************************************************
4. Label and output
********************************************************************************/
***Label variables 
	*label sample flags
	la var 	flag_cblgroup 				"CBL Group"
	la var 	flag_unintgroup				"Uninterested Group"
	la var 	flag_extragroup 			"Extra Step Group"
	la var 	flag_randomized 			"Randomized group (cbl and extra step)"
	la var 	flag_randomized_scored_base	"Randomized group with fico score at baseline"
	la var 	flag_matched 				"Sample matched to admin data"
	la var 	flag_matched_scored_base 	"Sample matched to admin data with fico score at baseline"
	la var 	htca				"1 = Highest Tercile of Installment Credit Activity at Baseline"
	la var  ltca				"1 = Lowest Tercile of Installment Credit Activity at Baseline"
	
	*label survey variables
	la var 	revbvstress 		"Q18a finances = stress"
	la var	bvafford 			"Q18b consider before buy"
	la var	revbvtoday 			"Q18c live for today"	
	la var 	revbvbills 			"Q18d hard covering bills"
	la var 	bvneed				"Q18e can come up w $2k (higher values = less likely)"
	la var  revbvneed 			"Q18e can come up w $2k (higher values = more likely)"
	la var 	revbvrisk 			"Q18f take risks"
	la var 	bvlongterm 			"Q18g set goals"
	la var 	revbvregret 		"Q18h wish disciplined w $"
	la var 	revbvhquality 		"Q18i demand high quality"
	la var	revbvtask 			"Q18j trouble finishing tasks"
	la var	bvluck				"Q18k success = hard work"
	la var 	finsit 				"Q17: Overal fin. sit. (higher values = worse)"
	la var 	revfinsit			"Q17: Overal fin. sit. (higher values = better)"
	la var 	appdifficult 		"Q11 difficulty getting approved for loan in past 3yrs"
	la var 	incblmed 			"Below med. income (<$20k)"
	la var 	savslccu60less 		"<$60 in savings at slccu at baseline"
	la var 	savslccu60more 		">$60 in savings at slccu at baseline"
	
	*label baseline survey indexes 
	la var z_insecurity_i 		"Insecurity index (standardized)"
	la var z_selfcont_i 		"Self-Control index (standardized)"
	la var z_risk_i 			"Risk Taking scale (standardized)"
	la var z_credstatus_i 		"Attention to Credit Status index (standardized)"
	la var z_credknow_i 		"Credit Process Knowledge index (standardized)"
	la var z_liquid_i 			"Lack of liquidity index (standardized)"
	la var z_credac_i 		"Installment Credit Access at Baseline Index (standardized)"


keep surveyid $liquid $insecurity $selfcont $risk $credstatus $credknow $indices_s ///
$groupflags $baselineoutcomes $baselinechars ne_ln fs081_*00 savslccu60less race_white ///
savslccu60more incgt30 inclt30 enc open_sd* $htegroups $htegroups2 ihs* svymiss 

*Save data set out as wide version with the survey indices 
	save "$adta/Sample and Baseline Vars.dta", replace

**EOF**
